# coding:utf-8
'''
http://lishi.tianqi.com/fuzhou/201601.html
'''
from urllib3 import PoolManager
from bs4 import BeautifulSoup
import pandas as pd

city = "fuzhou"
month = 201801

# http://tianqi.2345.com/t/wea_history/js/201905/58847_201905.js
# http://lishi.tianqi.com/fuzhou/201801.html
url = "https://lishi.tianqi.com/%s/%d.html" % (city, month)
print(url)
http = PoolManager()
result = http.request(method="GET", url=url)
# print(result.data.decode())
soup = BeautifulSoup(result.data.decode(), "html.parser")
data = []
'''
<ul class="lishitable_content clearfix">
<li>
<div><a title="福州2018年01月01日历史天气" target="_blank" href="//lishi.tianqi.com/fuzhou/20180101.html">2018-01-01</a></div>
<div style="width: 100px">15</div>
<div>10</div>
<div>多云</div>
<div style="width:200px;">西北风 2级</div>
</li>
……
'''
if soup.find("ul", attrs={"class": "lishitable_content clearfix"}):
    for li in soup.find("ul", attrs={"class": "lishitable_content clearfix"}).find_all("li"):
        line = []
        for li in li.find_all("div"):
            line.append(li.text.strip())
        if line:
            data.append(line)
print(data)
df = pd.DataFrame(data, columns=("日期", "最高气温", "最低气温", "天气", "风向"))
df.to_excel("d:/历史天气.xlsx")
